"""
修正微软拼音输入法无法添加多个格式化自定义短语的问题
Author: Scruel Tao
"""
import os
import re
import pathlib
import traceback
from pathlib import Path

# 自定义: 下面设置自定义短语，格式<拼音 位置 短语>，一行一项，短语中可放心包含空格
# 或也可在该脚本的同目录下，创建一个 phrases.txt，在其中以同一格式写入自定义短语
PHRASES_TEXT = """
dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
time 1 %yyyy%%MM%%dd%%HH%%mm%%ss%
time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒
""".strip()

LEX_FILE = os.path.join(os.getenv('APPDATA'),
                        r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex')

HEADER_LEN = 16 + 4
PHRASE_64PCNT_POS = HEADER_LEN
TOTAL_BYTES_POS = HEADER_LEN + 4
PHRASE_CNT_POS = HEADER_LEN + 8

PADDED_ENCODING = 'utf-16le'
HEADER_BYTES = bytes('mschxudp', encoding='ascii')
HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING)
PHRASE_SEPARATOR_BYTES = b'\x00\x00'
PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES)
PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40

phrase_fixed_last_bytes = b'\xA5\x2C'

def read_bytes(position, length=1):
    with open(LEX_FILE, 'rb+') as file:
        file.seek(position)
        return file.read(length)


def replace_bytes(position, value):
    with open(LEX_FILE, 'rb+') as file:
        file.seek(position)
        data = file.read()
        file.seek(position)
        file.write(value + data[len(value):])


def bytes2int(data):
    return int.from_bytes(data, byteorder='little')


def int2bytes(data, length=1):
    return int.to_bytes(data, length=length, byteorder='little')


def padded_bytes(s):
    def padded_byte(c):
        b = bytes(c, PADDED_ENCODING)
        return b + b'\x00' if len(b) == 1 else b
    return b''.join([padded_byte(c) for c in s])


def get_phrase_header(header_pinyin_len, index):
    return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2)
            + int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00'
            + phrase_fixed_last_bytes)

def main():
    global phrase_fixed_last_bytes

    current_dir = os.path.dirname(os.path.realpath(__file__))
    phrases_file = Path(current_dir) / 'phrases.txt'
    phrases_text = PHRASES_TEXT
    if phrases_file.exists():
        try:
            phrases_file_text = phrases_file.read_text('utf-8')
        except:
            phrases_file_text = phrases_file.read_text('gbk')
        phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n')
    phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x]))

    print(f"==================\n"
          f"Author: Scruel Tao\n"
          f"==================\n\n"
          f"正在修正巨硬拼音并添加\n"
          f"预置的日期格式化短语……\n"
          f"\n"
          f"短语数量：{len(phrase_items)}\n"
          )

    last_phrase_pos = 0
    phrase_list = []  # (is_new, pinyin, header, phrase))

    if not os.path.exists(LEX_FILE):
        with open(LEX_FILE, 'wb') as f:
            # Initing lex file
            f.write(HEADER_BYTES)
            f.write((b'\x40' + b'\x00' * 3) * 3)
            f.write(b'\x00' * 4)
            f.write(b'\x38\xd2\xa3\x65')
            f.write(b'\x00' * 32)
    else:
        phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4))
        phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1)

        # Read existing phrases
        for i in range(phrase_cnt):
            if i == phrase_cnt - 1:
                phrase_block_pos = phrase_block_len = -1
            else:
                phrase_block_pos = bytes2int(
                    read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4))
                phrase_block_len = phrase_block_pos - last_phrase_pos
            phrase_block_bytes = read_bytes(
                phrase_block_first_pos + last_phrase_pos, phrase_block_len)
            last_phrase_pos = phrase_block_pos
            pinyin_bytes, phrase_bytes = re.match(
                (b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups()
            phrase_fixed_last_bytes = phrase_block_bytes[14:16]
            # Prevent deleted phrases
            if phrase_block_bytes[9:10] == b'\x00':
                phrase_list.append((0, pinyin_bytes,
                                    phrase_block_bytes[:16], phrase_bytes))

    # Fix custom phrases
    for item in phrase_items:
        if not item:
            continue
        pinyin, index, phrase = item.split(maxsplit=2)
        pinyin_bytes = padded_bytes(pinyin)
        phrase_bytes = padded_bytes(phrase)
        phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes]
        header = get_phrase_header(
            16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index))
        phrase_list.append((1, pinyin_bytes, header, phrase_bytes))

    # Necessary fix, otherwise the order of phrases will be messed up.
    phrase_list.sort(key=lambda x: x[1])

    # Write phrases
    tolast_phrase_pos = 0
    total_size = PHRASE_LEN_FIRST_POS
    with open(LEX_FILE, 'rb+') as file:
        file.seek(PHRASE_LEN_FIRST_POS)
        file.truncate()
        for _, *items in phrase_list[:-1]:
            phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2
            tolast_phrase_pos += phrase_len
            file.write(int2bytes(tolast_phrase_pos, length=4))
            total_size += PHRASE_SEPARATOR_SIZE * 2
        for _, pinyin_bytes, header, phrase_bytes in phrase_list:
            file.write(header)
            data_bytes = PHRASE_SEPARATOR_BYTES.join(
                [pinyin_bytes, phrase_bytes, b''])
            file.write(data_bytes)
            total_size += len(header) + len(data_bytes)

    # Fix file header
    replace_bytes(PHRASE_64PCNT_POS, int2bytes(
        64 + len(phrase_list) * 4, length=4))
    replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4))
    replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4))


if __name__ == "__main__":
    try:
        main()
        print('Done')
    except:
        traceback.print_exc()
    os.system('pause')